# NOTE Edit config.py to get num_recs=None, else you'll be restricted to ≤200 recs/sp
# - Want: countries_k='na', com_names_k='us', num_recs=None,
# - Not: countries_k='na', com_names_k='us', num_recs=200,
from notebooks import *
sg.init(None) # Computes search_recs, if cache miss
search_recs = sg.search_recs
# Utils
def _recs_show(
df,
scale=None,
order=[
'xc_id', 'spectro_disp',
'species', 'subspecies',
'year', 'month_day', 'time',
'type',
'state', 'elevation', 'lat', 'lng', 'place',
'remarks',
'quality', 'recordist', 'background_species', 'date',
],
drop=['license', 'bird_seen', 'playback_used'],
astype={'year': int},
replace={'subspecies': {'': '—'}},
**kwargs,
):
return (df
# Drop any indexes (e.g. from filter/sort)
.reset_index(drop=True)
# Featurize
.pipe(recs_featurize_spectro_disp, scale=scale)
# View
.pipe(recs_view_cols, append=order) # append= to ensure all requested cols are included
.pipe(df_reorder_cols, last=order) # last= so that unknown cols show up loudly in the front
.drop(columns=drop)
.astype(astype)
.replace(replace)
)
# XXX Very slow to run over all 45k recs, do lazily instead
# recs = (search_recs
# .pipe(df_inspect, lambda df: (df.shape,))
# .pipe(_recs_show)
# .pipe(df_inspect, lambda df: (df.shape,))
# )
# Copy selected audio/spectro files to talk dir
for sp, xc_id, desc in [
('WREN', 90158, 'male-and-female'),
('PSFL', 348155, 'dawn-song'),
('PSFL', 348142, 'song'),
]:
payload_id = 'search_recs-audio_s(10),version(8),com_names_k(us),countries_k(na),num_recs(200)-4a07700'
search_recs_dir = 'data/cache/payloads/%(payload_id)s/mobile-version(2)/search_recs' % locals()
talk_dir = os.path.expanduser('~/Desktop/mb-talk-data')
shell(**locals(), cmd='''
cp -v %(search_recs_dir)s/audio/%(sp)s/audio-%(sp)s-%(xc_id)s.mp4 %(talk_dir)s/%(sp)s-%(xc_id)s-%(desc)s.mp4
cp -v %(search_recs_dir)s/spectro/%(sp)s/spectro-%(sp)s-%(xc_id)s.png %(talk_dir)s/%(sp)s-%(xc_id)s-%(desc)s.png
''')
# WREN
# - [Pie19] p365
# - Male song, female song, duet [verify]
# - [PFGBS] https://academy.allaboutbirds.org/peterson-field-guide-to-bird-sounds/?speciesCode=wrenti
# - [BNA] https://birdsna.org/Species-Account/bna/species/wrenti/sounds
# - "A long (2–5 s) series of short, clear, overslurred whistles at about 4 kHz, accelerating into a rapid
# chatter."
# - "Female song is similar, but with an even rhythm, and with a highly variable number of notes (3–14)."
# - "Number of introductory notes is variable, but typically 3–5 in the male song, and the quality of those notes
# can vary with increasing agitation."
# - "Partial male songs are common and sound identical to female songs."
# - Recs
# - WREN/90158 male and female (duet)
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'WREN',
# df.remarks.str.lower().str.contains('female'), # Not useful
# df.remarks.str.lower().str.contains('slow'), # Not useful
# df.remarks.str.lower().str.contains('male'), # Not useful
df.xc_id.isin([90158]),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
.sort_values('date', ascending=False)
# View
.pipe(_recs_show,
scale=3,
)
[:500]
)
# RTHA
# - [Pie19] p205
# - Squeal: "Given by immature birds in their first summer and fall... May transition gradually into adult Scream
# over course of first fall and winter; more study needed."
# - Recs
# - RTHA/187482 - squeal (juv) - clean
# - RTHA/139200 - squeal (juv) - messy
# - RTHA/65409 - scream (adult) - clean
# - RTHA/173877 - transitional squeal/scream [good]
# - RTHA/173880 - transitional squeal/scream [good, but too quiet]
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'RTHA',
df.xc_id.isin([187482, 139200, 65409, 173877, 173880]),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
# .sample(frac=1, random_state=0)
# [lambda df: ~df.subspecies.isin([''])].sort_values(['subspecies'], ascending=[True])
# .sort_values('month_day')
# View
.pipe(_recs_show,
scale=3,
)
# [:100]
)
# PSFL: dawn song vs. normal song
# - [Pie19] p272
# - Dawn song, normal song
# - (No "dawn" at [PFGBS] https://academy.allaboutbirds.org/peterson-field-guide-to-bird-sounds/?speciesCode=pasfly)
# - [BNA] https://birdsna.org/Species-Account/bna/species/pasfly/sounds
# - Same as [Pie19]
# - Recs
# - PSFL/348155 dawn song
# - PSFL/348142 song
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'PSFL',
df.recordist.str.lower().str.match(r'.*webster.*'),
df.xc_id.isin([348155, 348142]),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
# .sort_values(['subspecies', 'lat'], ascending=[True, False])
# .sort_values(['lat'], ascending=False)
# .sort_values(['lng'], ascending=True)
.sort_values(['time'], ascending=True)
# View
.pipe(_recs_show,
scale=3,
)
[:500]
)
# HUVI
# - [Pie19] p297
# - "Phrases vary tremendously"
# - "Incessant repetition of the same sound can be an excellent field mark, but singing occasionally deviates from
# this pattern"
# - [BNA] https://birdsna.org/Species-Account/bna/species/hutvir/sounds
# - "Much individual and geographic variation"
# - "Typical song pattern is a monotonous and unmusical series of nasal and wheezy 2-syllable, ascending phrases
# ... repeated approximately 1 per second for a period sometimes spanning ≥ 10 min"
# - "A common variation is composed of descending phrases"
# - "A bird may sing either form, may alternate between the 2 forms, and occasionally may mix ascending and
# descending phrases in the same series ... producing a pattern reminiscent of the song of Cassin's Vireo"
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'HUVI',
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
.sample(frac=1, random_state=2)
# View
.pipe(_recs_show,
scale=3,
)
[:25]
)
def _show_seasonal(
species,
filters=lambda df: [],
bins=2, # 2 | 4
n=None,
scale=None,
scale_base=3.8,
random_state=0,
):
assert bins in [2, 4]
if n is None:
n = 50 // bins
return (search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == species,
*filters(df),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
# .sample(100, random_state=random_state)
.sort_values(['month_day'], ascending=[False])
# View
.pipe(_recs_show,
scale=scale or scale_base / bins,
)
[:1000] # Safeguard: trim to a reasonable max (> any sp, but << all recs)
# Bin by season (4 bins)
# - Boundaries based roughly on https://birdsna.org/Species-Account/bna/species/ruckin/breeding
.assign(season=lambda df: df.month_day.str.split('-').str[0].map(lambda x: {
'09': 'fall migration',
'10': 'fall migration',
'11': 'fall migration',
'12': 'winter',
'01': 'winter',
'02': 'winter',
'03': 'spring migration',
'04': 'spring migration',
'05': 'spring migration',
'06': 'breeding',
'07': 'breeding',
'08': 'breeding',
}.get(x)))
.assign(season=lambda df: df.season.pipe(lambda s: s.pipe(as_ordered_cat, [
'fall migration', 'winter', 'spring migration', 'breeding',
])))
# Bin further
# - For bigger spectros (4->2 cols)
.pipe(lambda df: df if bins == 4 else (df
.replace({'season': {
'spring migration': 'spring migration / breeding',
'breeding': 'spring migration / breeding',
'fall migration': 'fall migration / winter',
'winter': 'fall migration / winter',
}})
.assign(season=lambda df: df.season.pipe(lambda s: s.pipe(as_ordered_cat, [
'fall migration / winter', 'spring migration / breeding',
])))
))
# Pivot by season (manually)
.pipe(lambda df: pd.concat(axis=1, objs=[
(df
[df.season == season]
[['spectro_disp']]
.rename(columns={'spectro_disp': season})
.sample(frac=1, random_state=random_state) # Randomize per column (to avoid weird biases from incidental sorting above)
.reset_index(drop=True)
)
for season in df.season.sort_values().unique()
]))
.fillna('')
.pipe(df_inspect, lambda df: (df.shape,))
[:n]
.pipe(df_inspect, lambda df: (df.shape,))
)
# Good: visually apparent
_show_seasonal('WIWA')
# Good
# - Take a moment to explain wraa calls on the left vs. trill songs on the right
_show_seasonal('SPTO')
# Good
# - Take a moment to explain chips on the left vs. songs on the right
show_seasonal('FOSP')
# [Skippable]
# Good example of partial switch
# - [Pie19] "All year, but mostly Mar—Aug"
_show_seasonal('SOSP')
def _show_seasonal(
species,
filters=lambda df: [],
bins=2, # 2 | 4
n=None,
scale=None,
scale_base=3.8,
random_state=0,
):
assert bins in [2, 4]
if n is None:
n = 50 // bins
return (search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == species,
*filters(df),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
# .sample(100, random_state=random_state)
.sort_values(['month_day'], ascending=[False])
# View
.pipe(_recs_show,
scale=scale or scale_base / bins,
)
[:1000] # Safeguard: trim to a reasonable max (> any sp, but << all recs)
# Bin by season (4 bins)
# - Boundaries based roughly on https://birdsna.org/Species-Account/bna/species/ruckin/breeding
.assign(season=lambda df: df.month_day.str.split('-').str[0].map(lambda x: {
'09': 'fall migration',
'10': 'fall migration',
'11': 'fall migration',
'12': 'winter',
'01': 'winter',
'02': 'winter',
'03': 'spring migration',
'04': 'spring migration',
'05': 'spring migration',
'06': 'breeding',
'07': 'breeding',
'08': 'breeding',
}.get(x)))
.assign(season=lambda df: df.season.pipe(lambda s: s.pipe(as_ordered_cat, [
'fall migration', 'winter', 'spring migration', 'breeding',
])))
# Bin further
# - For bigger spectros (4->2 cols)
.pipe(lambda df: df if bins == 4 else (df
.replace({'season': {
'spring migration': 'spring migration / breeding',
'breeding': 'spring migration / breeding',
'fall migration': 'fall migration / winter',
'winter': 'fall migration / winter',
}})
.assign(season=lambda df: df.season.pipe(lambda s: s.pipe(as_ordered_cat, [
'fall migration / winter', 'spring migration / breeding',
])))
))
# Pivot by season (manually)
.pipe(lambda df: pd.concat(axis=1, objs=[
(df
[df.season == season]
[['spectro_disp']]
.rename(columns={'spectro_disp': season})
.sample(frac=1, random_state=random_state) # Randomize per column (to avoid weird biases from incidental sorting above)
.reset_index(drop=True)
)
for season in df.season.sort_values().unique()
]))
.fillna('')
.pipe(df_inspect, lambda df: (df.shape,))
[:n]
.pipe(df_inspect, lambda df: (df.shape,))
)
# SPTO
# - [Pie19] p425
# - "Typical version: 2 unmusical series or trills, second higher"
# - "West Coast version: single long buzz"
# - [BNA] https://birdsna.org/Species-Account/bna/species/spotow/sounds
# - "Interior males sing songs generally containing both introductory and trill phrases, while coastal birds usually
# sing only trills."
# - Far western (coastal) birds trill faster on average than interior (mountain, plains) western birds, but trill
# rates are variable and largely overlapping (15.4–32.3 syll/s vs. 14.5–21.3 syll/s)
# - [BNA] https://birdsna.org/Species-Account/bna/species/spotow/systematics
# - 21 subsp (see map)
# - Various isolated subsp in the southern end of range (MX)
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'SPTO',
# df.type.str.lower().str.match(r'.*song.*'),
# df.type.str.lower().str.match(r'^song$'),
df.quality.isin(['A', 'B']),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
# .sample(250, random_state=0)
# .sample(50, random_state=0)
# .sort_values(['subspecies', 'lat'], ascending=False)
# .sort_values(['lat'], ascending=False)
# .sort_values(['lng'], ascending=True)
.pipe(df_inspect, lambda df: (df.shape,))
# View
.pipe(_recs_show,
scale=1.9,
)
# Bin by geo (2 bins)
# - Very rough, but decent results
# .assign(geo=lambda df: df.state.str.lower().str.replace('.', '').str.split(r',\s*').map(lambda states: first(
# [
# geo
# for state in states
# for geo in [{
#
# # # 'british columbia': 'west coast',
# # # 'bc': 'west coast',
# # # 'washington': 'west coast',
# # # 'wa': 'west coast',
# # # 'oregon': 'west coast',
# # 'california': 'west coast',
# # 'ca': 'west coast',
# # # 'inyo county': 'interior', # Right? (Inyo County ~ Death Valley, CA)
# # # 'colorado': 'interior',
# # # 'utah': 'interior',
# # 'arizona': 'interior',
# # 'az': 'interior',
# # 'new mexico': 'interior',
# # # 'mexico': 'interior',
#
# # 'british columbia': 'west coast',
# # 'bc': 'west coast',
# # 'washington': 'west coast',
# # 'wa': 'west coast',
# # 'oregon': 'west coast',
# 'california': 'california',
# 'ca': 'california',
# # 'inyo county': 'interior', # Right? (Inyo County ~ Death Valley, CA)
# 'colorado': 'colorado',
# # 'utah': 'interior',
# # 'arizona': 'arizona',
# # 'az': 'arizona',
# # # 'new mexico': 'new mexico',
# # 'mexico': 'mexico',
#
# }.get(state)]
# if geo is not None
# ] + [
# None,
# ]
# )))
.assign(geo=lambda df: df.pipe(df_map_rows, lambda row: (
'W -117 (coastal CA)' if row.lng < -117 else # -117 to exclude Inyo County (Death Valley) which behaves ~like interior
'E -110 (CO/AZ/MX)' if row.lng > -110 else # -110 to include CO/AZ/MX (at least)
None # Throw out everything in the middle, to simplify analysis
)))
[lambda df: df.geo.notnull()]
.assign(geo=lambda df: df.geo.pipe(lambda s: s.pipe(as_ordered_cat, [
# 'west coast', 'interior',
# 'california', 'colorado', 'arizona', 'mexico',
# 'california', 'colorado',
'W -117 (coastal CA)', 'E -110 (CO/AZ/MX)',
])))
# .pipe(df_inspect, lambda df: df[:250]) # Debug
# Pivot by geo (manually)
.pipe(lambda df: pd.concat(axis=1, objs=[
(df
[df.geo == geo]
[['spectro_disp']]
.rename(columns={'spectro_disp': geo})
.sample(frac=1, random_state=0) # Randomize per column (to avoid weird biases from incidental sorting above)
.reset_index(drop=True)
)
for geo in df.geo.sort_values().unique()
]))
.fillna('')
.pipe(df_inspect, lambda df: (df.shape,))
[:25]
.pipe(df_inspect, lambda df: (df.shape,))
)
# To search for examples
# (search_recs
# .pipe(df_inspect, lambda df: (df.shape,))
# # Filter
# [lambda df: reduce(lambda x, y: x & y, [
# df.species == 'GGSH',
# ])]
# .pipe(df_inspect, lambda df: (df.shape,))
# # Sort
# .sample(frac=1, random_state=0)
# # View
# .pipe(_recs_show,
# # scale=3,
# )
# [:250]
# )
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x | y, [
(df.species == 'SPTO') & df.xc_id.isin([127012]),
(df.species == 'EATO') & df.xc_id.isin([293823]),
(df.species == 'HUVI') & df.xc_id.isin([297120]),
# (df.species == 'HUVI') & df.xc_id.isin([348987]),
(df.species == 'WAVI') & df.xc_id.isin([159366]),
# (df.species == 'WAVI') & df.xc_id.isin([381527]),
(df.species == 'HETH') & df.xc_id.isin([314303]),
# (df.species == 'HETH') & df.xc_id.isin([131636]),
(df.species == 'BEWR') & df.xc_id.isin([163209]),
# (df.species == 'BEWR') & df.xc_id.isin([141349]),
(df.species == 'HOWR') & df.xc_id.isin([265810]),
(df.species == 'BANO') & df.xc_id.isin([294969]), # Juv shriek [PFGBS]
(df.species == 'GHOW') & df.xc_id.isin([154990]), # Juv shriek [PFGBS]
(df.species == 'BGGN') & df.xc_id.isin([376229]),
# (df.species == 'BGGN') & df.xc_id.isin([81059]),
(df.species == 'BCGN') & df.xc_id.isin([30087]),
(df.species == 'BTGN') & df.xc_id.isin([253889]),
(df.species == 'CAGN') & df.xc_id.isin([17808]),
(df.species == 'LOSH') & df.xc_id.isin([255158]),
# (df.species == 'LOSH') & df.xc_id.isin([255145]),
(df.species == 'GGSH') & df.xc_id.isin([91968]), # NOSH (Northern Shrike) used to be GGSH (Great Gray Shrike)
(df.species == 'CASJ') & df.xc_id.isin([347904]),
(df.species == 'STJA') & df.xc_id.isin([146610]),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# View
.pipe(_recs_show,
scale=2.9,
)
.pipe(df_ordered_cats_like, species=metadata.ebird.df.shorthand)
.sort_values(['species'])
)
# BEWR HOWR PAWR
# BGGN
# CATH
# NOMO
# LEGO
# BRCR
# RBNU WBNU PYNU
# OATI CBCH
# HUVI WAVI CAVI
# WEKI
# BLPH SAPH WEWP OSFL ATFL PSFL
# RBSA
# HAWO DOWO NUWO ACWO PIWO NOFL
# RTHA(age) RSHA COHA SSHA AMKE MERL
# COLO
# WCSP
# - Around bay area?
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'WCSP',
df.state.str.lower().str.match(r'.*\b(ca|california)\b.*'),
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
.sort_values(['subspecies', 'lat'], ascending=[True, False])
# .sort_values(['lat'], ascending=False)
# .sort_values(['lng'], ascending=True)
# View
.pipe(_recs_show,
# scale=3,
)
[:500]
)
# SOSP
# - Don't know what to do with this one
# - By subspecies?
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'SOSP',
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
[lambda df: ~df.subspecies.isin([''])]
.sample(100, random_state=0)
.sort_values(['subspecies'], ascending=[True])
# View
.pipe(_recs_show,
# scale=3,
)
[:100]
)
# BEWR
# - Don't know what to do with this one
# - Around US?
# - Across individuals?
(search_recs
.pipe(df_inspect, lambda df: (df.shape,))
# Filter
[lambda df: reduce(lambda x, y: x & y, [
df.species == 'BEWR',
])]
.pipe(df_inspect, lambda df: (df.shape,))
# Sort
.sample(100, random_state=0)
.sort_values(['lng'], ascending=[True])
# View
.pipe(_recs_show,
# scale=3,
)
[:100]
)
import pptx
path = os.path.expanduser('~/Desktop/Birds.pptx')
prs = pptx.Presentation(path)
dirs(prs)
dirs(prs.slides)
list(prs.slides)
dirs(prs.slides[5])
list(prs.slides[5].shapes)
list(prs.slides[-1].shapes)
dir(prs.slides[-1].shapes[0])
prs.slides[-1].shapes[0].text
dir(prs.slides[-1].shapes[1])
(prs.slides[-1].shapes[1].shape_type, pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE)
dirs(pptx.enum.shapes.MSO_SHAPE_TYPE)
dirs(pptx.enum.shapes.PP_MEDIA_TYPE)
dir(prs.slides[-1].shapes[1].click_action)
dir(prs.slides[-1].shapes)